/*
  author Sylvain Bertrand <sylvain.bertrand@gmail.com>
  Protected by linux GNU GPLv2
  Copyright 2012-2014
*/
#include <linux/pci.h>
#include <asm/byteorder.h>
#include <linux/delay.h>
#include <linux/cdev.h>
#include <linux/vmalloc.h>

#include <alga/alga.h>
#include <alga/rng_mng.h>
#include <alga/timing.h>
#include <uapi/alga/pixel_fmts.h>
#include <uapi/alga/amd/dce6/dce6.h>

#include "mc.h"
#include "ih.h"
#include "rlc.h"
#include "fence.h"
#include "ring.h"
#include "dmas.h"
#include "ba.h"
#include "cps.h"
#include "gpu.h"
#include "drv.h"

#include "regs.h"

#include "ba_private.h"

static void ptes_restore(struct pci_dev *dev, struct ba_map *m)
{
	struct dev_drv_data *dd;
	u64 dummy_pte;
	u64 i;

	dd = pci_get_drvdata(dev);

	dummy_pte = dd->ba.dummy_bus_addr | PTE_VALID | PTE_SYSTEM
				| PTE_SNOOPED | PTE_READABLE | PTE_WRITEABLE;
	for (i = 0; i < m->ptes_n; ++i) {
		/* 64 bits little endian */
		vram_w32(dev, lower_32_bits(dummy_pte), m->ptes_start + i
								* PTE_SZ);
		vram_w32(dev, upper_32_bits(dummy_pte), m->ptes_start + i
							* PTE_SZ + sizeof(u32));
	}
}

void core_coherent_cleanup(struct pci_dev *dev, struct ba_map *m, u8 flgs)
{
	struct dev_drv_data *dd;

	dd = pci_get_drvdata(dev);

	if (!(flgs & BA_NO_PT_UPDATE))
		ptes_restore(dev, m);
	rng_free(&dd->ba.mng, m->gpu_addr);
	dma_free_coherent(&dev->dev, m->ptes_n * GPU_PAGE_SZ, m->cpu_addr,
								m->bus_addr);
	kfree(m);
}

static void ptes_coherent_contig_install(struct pci_dev *dev, struct ba_map *m)
{
	struct dev_drv_data *dd;
	u64 i;
	u64 first_pte_idx;
	u64 pte;
	u64 bus_addr;

	dd = pci_get_drvdata(dev);

	first_pte_idx = GPU_PAGE_IDX(m->gpu_addr - dd->ba.mng.s);
	m->ptes_start = dd->ba.pt_start + first_pte_idx * PTE_SZ;

	pte = m->ptes_start;
	bus_addr = m->bus_addr;

	/*
	 * coherent contig install only used for the core mapping, then no
	 * rings and onli mmio regs
	 */
	for (i = 0; i < m->ptes_n; ++i) {
		pte_mmio_regs_install(dev, pte, bus_addr);
		pte += PTE_SZ;
		bus_addr += GPU_PAGE_SZ;
	}
}

static struct ba_map *map_coherent_contig(struct pci_dev *dev, u64 gpu_ps_n)
{
	struct ba_map *m;
	u64 sz;
	long r;
	struct dev_drv_data *dd;

	dd = pci_get_drvdata(dev);

	m = kzalloc(sizeof(*m), GFP_KERNEL);
	if (m == NULL) {
		dev_err(&dev->dev, "ba:unable to allocate memory for coherent mapping\n");
		goto err;
	}

	m->type = BA_MAP_COHERENT_CONTIG;
	m->ptes_n = gpu_ps_n;

	sz = m->ptes_n * GPU_PAGE_SZ;

	/* allocate a range of the aperture */
	r = rng_alloc_align(&m->gpu_addr, &dd->ba.mng, GPU_PAGE_SZ
						* m->ptes_n, GPU_PAGE_SZ);
	if (r == -ALGA_ERR ) {
		dev_err(&dev->dev, "ba:unable to allocate gpu address space for coherent mapping\n");
		goto err_free_ba_map;
	}

	/* do the coherent mapping */
	m->cpu_addr = dma_zalloc_coherent(&dev->dev, sz, &m->bus_addr,
								GFP_KERNEL);
	if (m->cpu_addr == NULL) {
		dev_err(&dev->dev, "ba:unable to perform coherent mapping\n");
		goto err_free_rng;
	}

	/* install the ptes for this mapping */
	ptes_coherent_contig_install(dev, m);

	/* account for this new mapping */
	list_add(&m->n, &dd->ba.maps);

	/* be sure the gpu pte updates were sent over the bus */
	wmb();

	/* flush tlb to make live the mapping on the gpu */
	tlb_flush(dev);
	return m;

err_free_rng:
	rng_free(&dd->ba.mng, m->gpu_addr);
	
err_free_ba_map:
	kfree(m);
err:
	return NULL;
}
/*
 * Just an helper for the core mappings, which do have only the mmio regs to
 * install the ptes. They cannot use a ring command since no ring is mapped.
 */
long core_coherent_map(struct pci_dev *dev, u64 sz, struct ba_map **m)
{
	if (sz % GPU_PAGE_SZ) {
		dev_err(&dev->dev, "ba:core_coherent:size not aligned on gpu page size\n");
		return -BA_ERR;
	}

	*m = map_coherent_contig(dev, GPU_PAGE_IDX(sz));
	if (*m == NULL) {
		dev_err(&dev->dev, "ba:core_coherent:unable to dma map the aperture\n");
		return -BA_ERR;
	}
	dev_info(&dev->dev, "ba:core_coherent:aperture mapped gpu_addr=0x%016llx cpu_addr=0x%p ptes_n=0x%016llx\n",
				(*m)->gpu_addr, (*m)->cpu_addr, (*m)->ptes_n);
	return 0;
}
